import pymysql
import requests
from lxml import etree
# 将爬取下来的数据保存在本地的mysql数据库中需要用到pymysql
# pip install pymysql

url="https://www.zhihu.com/hot"
headers={
    # User-Agent里面包含了浏览器的身份信息
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
    # Cookie包含了用户登录的身份信息
    'Cookie':'_zap=07e29061-f2ed-4494-994e-3ef5d7366221; d_c0=AaAVVp5S2BePTnoj88kNZeaCB9SEdAhf4sw=|1702434163; _xsrf=QVYQPC4AEjYitpyxWzw2LEFUusi4EAEu; __snaker__id=IZIdmP42VTf1zCZV; gdxidpyhxdE=fOpMH%5CbY1ndbbvQ20kgab2tSoHdTLGjDZRLGyed3GNvHqv%2BUJui%2Fme8SjeVTWxT7X2kg%5CUh5AVvw348UxZWQqla46PN55%5CfY88iSdX2osZp%2BlK83PeH21EKp11DjDtrtyh%5C%2FCSg5gqOa6v91lJRNcA%5CClmKxwql%2F99agaW7kJ7UjDcaQ%3A1714960322702; captcha_session_v2=2|1:0|10:1714959465|18:captcha_session_v2|88:RWdoSmUvRFZZSWg0aU5XOFBPaUF3RzdpWjNnNW5zUTRGTVJPRDlFa3QrT1lKT2tIY2dHUUJrL2tMVXptL1NWMw==|206a75aa40179bd0ada8a920bc5166637b34b402e4f117418f8fe51b5f2530e8; captcha_ticket_v2=2|1:0|10:1714959471|17:captcha_ticket_v2|728:eyJ2YWxpZGF0ZSI6IkNOMzFfcHUyQmtpYVlLQUJxdnc0Sldlc2NwQXNONWpDNXlYdm05OGQqYVhWdWVJelcud2trY05aekVlaCpLTFlxamhfcXM5QW9NUUN6Q1Rwei5id2RSdHdJaWNqKnNpdjFyd1dkeVVwbTk4WFFEKm05NnZFOG1rWlFuSFVqcExfS08xT3RHbktES3dHVF9veGFHeWFpRV9MKms1SXdUdXVDVE1lSHRvb3RsdzJBY3hrazlBUmJ2dnh1MXMucGxlQjRPeGxlOVBRZXVnM0U5bk1RVE1HMTBRT3IuQ2lBdWsubTBrKm1RbWRRbWhqT2FoUmpJdC5qVS5vd3JyNno5Z2xsMjViYVVTVWJFTFV3YVljQ2FfMnRrWFNFQWh3STFPZTFDLlFIOXV6bHJ3VG1wZmo5Q0hkTVBSa1FXSE1kRUFhV2gwaFJlSGRVenMxWVJyYVNnZGluWUZlS2lGWUpmMGI2Sm50VVVPYjk1d05UQjl5MGUxUHliRnBoalVNWHlPRXJ1elp2dnFPdGNMQnkwYWlzYVg2UmNhZGxYbE1vbFlkZ3VFSFlLWlRfeERzaG1DYlhHUzEzM2YwQ25UY0RMSU53RXJYXzlzME5CVVpmTGU4cFJGLnIuRW80a29nVFhhWlZfSm1PUm0zc0g4Mk9PMWJoWHd4LlZETWs5RVZ0U0ozKlA0TDZ0cFdqcFk3N192X2lfMSJ9|c1d0f3421147842693bf05eb803e2294976114064523222e173dc4ad7451bf3e; z_c0=2|1:0|10:1714959492|4:z_c0|92:Mi4xMnZNaENBQUFBQUFCb0JWV25sTFlGeVlBQUFCZ0FsVk5oSUlsWndEazJwSzVBTV9QejRZeVo1UjM0RFFoU2VzQmpn|83a513cc0284976929a3c4061081fb22b608cab625a035ef42d1f39e4499cb25; q_c1=cc6057e27b0b4eb4bb9f9076769f1a12|1714959492000|1714959492000; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1714100588,1714959420,1714965525,1714975830; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1714975830; tst=h; SESSIONID=G0JI3FN7YCfjzVfbuBMeJPN9KG4N24uK77x6avCz1eD; JOID=VlsWA0ydIr-uk80UapnGJPoT45Z13RL24KOMWl7HZtnH7ph1CMZVg8yXyhduZP1FTEK_sfrnm9-FQIOt_OlzUhY=; osd=Vl0SA0ydJLuuk80SbpnGJPwX45Z12xb24KOKXl7HZt_D7ph1DsJVg8yRzhduZPtBTEK_t_7nm9-DRIOt_O93UhY=; KLBRSID=ca494ee5d16b14b649673c122ff27291|1714976306|1714975820'
}
resp=requests.get(url,headers=headers)
# print(resp.text)
tree=etree.HTML(resp.text)
title_list=tree.xpath("//*[@id='TopstoryContent']/div/div[2]/div[1]/section/div[2]/a/h2/text()")
url_list=tree.xpath("//*[@id='TopstoryContent']/div/div[2]/div[1]/section/div[2]/a/@href")
hot_news=list(zip(title_list,url_list))
# 将数据保存在mysql数据库中
# 创建数据库链接对象
conn=pymysql.connect(host="127.0.0.1",port=3306,user="root",password="123456",db="dz_python")
# 获取游标（操作数据库指令的装置）
cursor=conn.cursor()
# 准备数据库的操作的sql
sql="INSERT INTO zhihu(news_name,news_url) VALUES(%s,%s)"

# 写入数据
# 使用双层for循环嵌套进行写入信息
# 外层循环确定行
for row in range(0,len(hot_news)):
    # 执行sql语句
    cursor.execute(sql, (hot_news[row][0], hot_news[row][1]))
    # 数据写入后，需要手动提交事务
    conn.commit()
    pass

# 保存文件
resp.close()

